#!/bin/sh
#
# Copyright 2015 Red Hat, Inc.
#
# Description:  Manages evacuation of nodes running nova-compute
#
# Authors: Andrew Beekhof
#
# Support:      openstack@lists.openstack.org
# License:      Apache Software License (ASL) 2.0
#


#######################################################################
# Initialization:

###
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
###

: ${__OCF_ACTION=$1}

#######################################################################

meta_data() {
	cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="NovaEvacuate" version="1.0">
<version>1.0</version>

<longdesc lang="en">
Facility for tacking a list of compute nodes and reliably evacuating the ones that fence_evacuate has flagged.
</longdesc>
<shortdesc lang="en">Evacuator for OpenStack Nova Compute Server</shortdesc>

<parameters>

<parameter name="auth_url" unique="0" required="1">
<longdesc lang="en">
Authorization URL for connecting to keystone in admin context
</longdesc>
<shortdesc lang="en">Authorization URL</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="username" unique="0" required="1">
<longdesc lang="en">
Username for connecting to keystone in admin context
</longdesc>
<shortdesc lang="en">Username</shortdesc>
</parameter>

<parameter name="password" unique="0" required="1">
<longdesc lang="en">
Password for connecting to keystone in admin context
</longdesc>
<shortdesc lang="en">Password</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="tenant_name" unique="0" required="1">
<longdesc lang="en">
Tenant name for connecting to keystone in admin context.
Note that with Keystone V3 tenant names are only unique within a domain.
</longdesc>
<shortdesc lang="en">Keystone v2 Tenant or v3 Project Name</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="user_domain" unique="0" required="1">
<longdesc lang="en">
User's domain name. Used when authenticating to Keystone.
</longdesc>
<shortdesc lang="en">Keystone v3 User Domain</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="project_domain" unique="0" required="1">
<longdesc lang="en">
Domain name containing project. Used when authenticating to Keystone.
</longdesc>
<shortdesc lang="en">Keystone v3 Project Domain</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="endpoint_type" unique="0" required="0">
<longdesc lang="en">
Nova API location (internal, public or admin URL)
</longdesc>
<shortdesc lang="en">Nova API location (internal, public or admin URL)</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="region_name" unique="0" required="0">
<longdesc lang="en">
Region name for connecting to nova.
</longdesc>
<shortdesc lang="en">Region name</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="insecure" unique="0" required="0">
<longdesc lang="en">
Explicitly allow client to perform "insecure" TLS (https) requests.
The server's certificate will not be verified against any certificate authorities.
This option should be used with caution.
</longdesc>
<shortdesc lang="en">Allow insecure TLS requests</shortdesc>
<content type="boolean" default="0" />
</parameter>

<parameter name="no_shared_storage" unique="0" required="0">
<longdesc lang="en">
Disable shared storage recovery for instances. Use at your own risk!
</longdesc>
<shortdesc lang="en">Disable shared storage recovery for instances</shortdesc>
<content type="boolean" default="0" />
</parameter>

<parameter name="verbose" unique="0" required="0">
<longdesc lang="en">
Enable extra logging from the evacuation process
</longdesc>
<shortdesc lang="en">Enable debug logging</shortdesc>
<content type="boolean" default="0" />
</parameter>

</parameters>

<actions>
<action name="start"        timeout="20" />
<action name="stop"         timeout="20" />
<action name="monitor"      timeout="600" interval="10" depth="0"/>
<action name="validate-all" timeout="20" />
<action name="meta-data"    timeout="5" />
</actions>
</resource-agent>
END
}

#######################################################################

# don't exit on TERM, to test that lrmd makes sure that we do exit
trap sigterm_handler TERM
sigterm_handler() {
	ocf_log info "They use TERM to bring us down. No such luck."
	return
}

evacuate_usage() {
	cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}

Expects to have a fully populated OCF RA-compliant environment set.
END
}

evacuate_stop() {
    rm -f "$statefile"
    return $OCF_SUCCESS
}

evacuate_start() {
    touch "$statefile"
    # Do not invole monitor here so that the start timeout can be low
    return $?
}

update_evacuation() {
    attrd_updater -p -n evacuate -Q -N ${1} -v ${2}
    arc=$?
    if [ ${arc} != 0 ]; then
	ocf_log warn "Can not set evacuation state of ${1} to ${2}: ${arc}"
    fi
    return ${arc}
}

handle_evacuations() {
    while [ $# -gt 0 ]; do
	node=$1 
	state=$2
	shift; shift;
	need_evacuate=0

	case $state in
	    "")
	        ;;
	    no)
		ocf_log debug "$node is either fine or already handled"
		;;
	    yes) need_evacuate=1
		;; 
	    *@*)
		where=$(echo $state | awk -F@ '{print $1}')
		when=$(echo $state | awk -F@ '{print $2}')
		now=$(date +%s)

		if [ $(($now - $when)) -gt 60 ]; then
		    ocf_log info "Processing partial evacuation of $node by $where at $when"
		    need_evacuate=1
		else
		    # Give some time for any in-flight evacuations to either complete or fail
		    # Nova won't react well if there are two overlapping requests 
		    ocf_log info "Deferring processing partial evacuation of $node by $where at $when"
		fi
		;;
	esac

	if [ $need_evacuate = 1 ]; then
	    fence_agent="fence_compute"

	    if have_binary fence_evacuate
	    then
		fence_agent="fence_evacuate"
	    fi

	    ocf_log notice "Initiating evacuation of $node with $fence_agent"
	    $fence_agent ${fence_options} -o status -n ${node}
	    if [ $? = 1 ]; then
		ocf_log info "Nova does not know about ${node}"
		# Dont mark as no because perhaps nova is unavailable right now
		continue
	    fi

	    update_evacuation ${node} "$(uname -n)@$(date +%s)"
	    if [ $? != 0 ]; then
		return $OCF_SUCCESS
	    fi

	    $fence_agent ${fence_options} -o off -n $node
	    rc=$?

	    if [ $rc = 0 ]; then
		update_evacuation ${node} no
		ocf_log notice "Completed evacuation of $node"
	    else
		ocf_log warn "Evacuation of $node failed: $rc"
		update_evacuation ${node} yes
	    fi
	fi
    done

    return $OCF_SUCCESS
}

evacuate_monitor() {
    if [ ! -f "$statefile" ]; then
	return $OCF_NOT_RUNNING
    fi

    handle_evacuations $(
        attrd_updater -n evacuate -A |
            sed 's/ value=""/ value="no"/' |
            tr '="' '  ' |
            awk '{print $4" "$6}'
    )
    return $OCF_SUCCESS
}

evacuate_validate() {
    rc=$OCF_SUCCESS
    fence_options=""

    
    if ! have_binary fence_evacuate; then
       check_binary fence_compute
    fi

    # Is the state directory writable? 
    state_dir=$(dirname $statefile)
    touch "$state_dir/$$"
    if [ $? != 0 ]; then
	ocf_exit_reason "Invalid state directory: $state_dir"
	return $OCF_ERR_ARGS
    fi
    rm -f "$state_dir/$$"

    if [ -z "${OCF_RESKEY_auth_url}" ]; then
	   ocf_exit_reason "auth_url not configured"
	   exit $OCF_ERR_CONFIGURED
    fi

    fence_options="${fence_options} -k ${OCF_RESKEY_auth_url}"

    if [ -z "${OCF_RESKEY_username}" ]; then
	   ocf_exit_reason "username not configured"
	   exit $OCF_ERR_CONFIGURED
    fi

    fence_options="${fence_options} -l ${OCF_RESKEY_username}"

    if [ -z "${OCF_RESKEY_password}" ]; then
	   ocf_exit_reason "password not configured"
	   exit $OCF_ERR_CONFIGURED
    fi

    fence_options="${fence_options} -p ${OCF_RESKEY_password}"

    if [ -z "${OCF_RESKEY_tenant_name}" ]; then
	   ocf_exit_reason "tenant_name not configured"
	   exit $OCF_ERR_CONFIGURED
    fi

    fence_options="${fence_options} -t ${OCF_RESKEY_tenant_name}"

    if [ -n "${OCF_RESKEY_user_domain}" ]; then
        fence_options="${fence_options} -u ${OCF_RESKEY_user_domain}"
    fi

    if [ -n "${OCF_RESKEY_project_domain}" ]; then
        fence_options="${fence_options} -P ${OCF_RESKEY_project_domain}"
    fi

    if [ -n "${OCF_RESKEY_region_name}" ]; then
        fence_options="${fence_options} \
            --region-name ${OCF_RESKEY_region_name}"
    fi

    if [ -n "${OCF_RESKEY_insecure}" ]; then
        if ocf_is_true "${OCF_RESKEY_insecure}"; then
            fence_options="${fence_options} --insecure"
        fi
    fi

    if [ -n "${OCF_RESKEY_no_shared_storage}" ]; then
	if ocf_is_true "${OCF_RESKEY_no_shared_storage}"; then
	    fence_options="${fence_options} --no-shared-storage"
	fi
    fi

    if [ -n "${OCF_RESKEY_verbose}" ]; then
        if ocf_is_true "${OCF_RESKEY_verbose}"; then
            fence_options="${fence_options} --verbose"
        fi
    fi

    if [ -n "${OCF_RESKEY_endpoint_type}" ]; then
	case ${OCF_RESKEY_endpoint_type} in
	    adminURL|publicURL|internalURL) ;;
	    *)
		ocf_exit_reason "endpoint_type ${OCF_RESKEY_endpoint_type} not valid. Use adminURL or publicURL or internalURL"
		exit $OCF_ERR_CONFIGURED
	    ;;
	esac
	fence_options="${fence_options} -e ${OCF_RESKEY_endpoint_type}"
    fi

    if [ $rc != $OCF_SUCCESS ]; then
	exit $rc
    fi
    return $rc
}

statefile="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.active"

case $__OCF_ACTION in
    start)
	evacuate_validate
	evacuate_start
	;;
    stop)
	evacuate_stop
	;;
    monitor)
	evacuate_validate
	evacuate_monitor
	;;
    meta-data)
	meta_data
	exit $OCF_SUCCESS
	;;
    usage|help)
	evacuate_usage
	exit $OCF_SUCCESS
	;;
    validate-all)
	exit $OCF_SUCCESS
	;;
    *)
	evacuate_usage
	exit $OCF_ERR_UNIMPLEMENTED
	;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
